
sink(here("..", "results", "log_SAR_plot.txt"))


######################################################################
######################################################################
######################################################################
######  This file contains code to create plots                 ######
######      SAR DGP estimated with wrong W                       #####
######     in Betz, Cook, Hollenbach 2019                       ######
#### Bias due to network misspecification under spatial dependence ###
######################################################################
######################################################################


tic("SAR_plot")

theme_plot <- function(...) {
  theme_minimal() +
    theme(
      text = element_text(color = "#22211d"),
      axis.line = element_blank(),
      axis.text.x = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks = element_blank(),
      axis.title.x = element_blank(),
      axis.title.y = element_blank(),
      ...
    )
}


### load knn & row normalized simulation data
load(here("..", "data", "misspecified_W_sar_row.rda"))
names(simulation_misspecified_W_sar_row)

##### prep for plot
plot_data <- simulation_misspecified_W_sar_row %>%
  dplyr::select(c(rho_y, rho_x, prob_misspecified, coef, method)) %>%
  mutate(method2 = paste(method, prob_misspecified, sep = "-"),
         method2 = case_when(method2 == "LM-NA" ~ "LM",
                             TRUE ~ method2),
         method3 = "OLS")

#### create plot and save
p <- ggplot(data = filter(plot_data, method == "SAR"))
p <- p + geom_line(aes(x = coef, colour = as.integer(prob_misspecified * 100), group = prob_misspecified), stat = "density") + facet_grid(rho_y ~ rho_x, labeller = label_bquote(cols = rho[x] ~ "=" ~ .(rho_x), rows = rho[y] ~ "=" ~ .(rho_y)))
p  <- p + theme_plot() + labs(title = "", y="Density", x = "Coefficient Estimates")
p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p  <- p + theme(axis.text.x=element_text(size=15, face = "bold")) + theme(axis.text.y=element_text(size=15, face = "bold")) + theme(axis.title.x = element_text(size = 22),axis.title.y = element_text(size = 22)) + theme(strip.text.x = element_text(size=14, face = "bold"), strip.text.y = element_text(size=12, face="bold", angle = 45))
p <- p + geom_vline(aes(xintercept = 2), color = "black", size = 0.5)
p <- p + geom_line(data = filter(plot_data, method2 == "LM"),stat = "density",aes(x = coef), color = "black", size = 1, alpha = 0.5) + scale_colour_gradient(name = "Misspecification \nProbability",low = "#fdd49e", high = "#7f0000", breaks = c(0, 25, 50, 75, 100), labels = c("0", "0.25", "0.5", "0.75", "1")) + scale_linetype_discrete(name = "")
p  <- p + theme(legend.position = "right", legend.direction = "vertical",legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 16), legend.title = element_text(size = 16), panel.border = element_rect(colour = "black", fill=NA, size=1.5)) + scale_x_continuous(limits = c(1,3.5))
ggsave(here("..", "results", "FigureC1.pdf"),plot = p, width = 12.944, height = 8)


##### load knn & min-max normalization data
load(here("..", "data", "misspecified_W_sar_minmax.rda"))


## prep for plot
plot_data <- simulation_misspecified_W_sar_minmax %>%
  dplyr::select(c(rho_y, rho_x, prob_misspecified, coef, method)) %>%
  mutate(method2 = paste(method, prob_misspecified, sep = "-"),
         method2 = case_when(method2 == "LM-NA" ~ "LM",
                             TRUE ~ method2),
         method3 = "OLS")

#### create plot and save
p <- ggplot(data =  dplyr::filter(plot_data, method == "SAR"))
p <- p + geom_line(aes(x = coef, colour = as.integer(prob_misspecified * 100), group = prob_misspecified), stat = "density") + facet_grid(rho_y ~ rho_x, labeller = label_bquote(cols = rho[x] ~ "=" ~ .(rho_x), rows = rho[y] ~ "=" ~ .(rho_y)))
p  <- p + theme_plot() + labs(title = "", y="Density", x = "Coefficient Estimates")
p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p  <- p + theme(axis.text.x=element_text(size=15, face = "bold")) + theme(axis.text.y=element_text(size=15, face = "bold")) + theme(axis.title.x = element_text(size = 22),axis.title.y = element_text(size = 22)) + theme(strip.text.x = element_text(size=14, face = "bold"), strip.text.y = element_text(size=12, face="bold", angle = 45))
p <- p + geom_vline(aes(xintercept = 2), color = "black", size = 0.5)
p <- p + geom_line(data =  dplyr::filter(plot_data, method2 == "LM"),stat = "density",aes(x = coef), color = "black", size = 1, alpha = 0.5) + scale_colour_gradient(name = "Misspecification \nProbability",low = "#fdd49e", high = "#7f0000", breaks = c(0, 25, 50, 75, 100), labels = c("0", "0.25", "0.5", "0.75", "1")) + scale_linetype_discrete(name = "")
p  <- p + theme(legend.position = "right", legend.direction = "vertical",legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 16), legend.title = element_text(size = 16), panel.border = element_rect(colour = "black", fill=NA, size=1.5)) + scale_x_continuous(limits = c(1,3.5))
ggsave(here("..", "results", "Figure1.pdf"),plot = p, width = 12.944, height = 8)



#### calculation of predicted bias and simulated bias
simulation_misspecified_W_sar <- simulation_misspecified_W_sar_minmax %>%
  mutate(ratio = covXWY / varX)

summary(simulation_misspecified_W_sar)


predict_beta1 <- simulation_misspecified_W_sar  %>%
  dplyr::filter(method == "SAR" & rho_x %in% c(0, 0.3, 0.6) & rho_y %in% c(0, 0.3, 0.6) & prob_misspecified == 0) %>%
  dplyr::select(c(coef,  rho_y, rho_x, covXWY, varX, ratio)) %>%
  group_by(rho_x,  rho_y) %>%
  summarize_all(mean) %>%
  ungroup()

predict_beta2 <-  simulation_misspecified_W_sar  %>%
  dplyr::filter(method == "LM" & rho_x %in% c(0, 0.3, 0.6) &  rho_y %in% c(0, 0.3, 0.6)) %>%
  dplyr::select(c(coef,  rho_y, rho_x)) %>%
  group_by(rho_x,  rho_y) %>%
  summarize_all(mean) %>%
  ungroup() %>%
  rename(OLScoef = coef)

predict_beta  <- left_join(predict_beta1, predict_beta2, by = c("rho_x", "rho_y")) %>%
  mutate(expected_beta = 2 + (ratio * rho_y)) %>%
  dplyr::select(-c(coef))



predict_beta  <- bind_cols(predict_beta[, c(1:5)], predict_beta[, 7], predict_beta[, 6])

#### Table C.1
### change column names by hand and move caption up to
tab  <- xtable(predict_beta,  caption = "Analytical beta \\& Mean beta in Simulations -- SAR", label = "tab:sar_beta", align = "cccccccc")
print(tab, include.rownames = FALSE, booktabs = TRUE, file = here("..", "results", "TableC1.txt"))


#### average direct effect
load(here("..", "data", "DGP_W_sar_minmax.rda"))


sum(diag(solve(diag(150) - (0.3*dgpW_norm)) * 2)) / 150
sum(diag(solve(diag(150) - 0.6 * dgpW_norm) * 2)) / 150

toc()

sink()
